######################################################################
######################################################################
####            A estrada dos tijolos amarelos:                   ####
#### Como e por qu? produzir trabalhos qualitativos transparentes?####
####    Amanda Domingos, Palloma Marciano & Virginia Rocha        ####                            
####      (Corresponding author: amanda.domingos@ufpe.br)         ####                      
######################################################################
######################################################################
  library(readr)
  library(tidyverse)
  library(plyr)
  library(dplyr)
  library("wordcloud")
  library(scales)

#loading datasets
total <- read.csv("Data/AnalysisData/total.csv")
freq_quali <- read.csv("Data/AnalysisData/quali.csv")
freq_quanti <- read.csv("Data/AnalysisData/quanti.csv")
freq_trans <- read.csv("Data/AnalysisData/transp.csv")


#######################
## Nuvem de palavras ##
#######################
quali <- count(freq_quali, 'term')
quanti <- count(freq_quanti, 'term')
transp <- count(freq_trans, 'term')


set.seed(1234)
wordcloud(words = quali$term, freq = quali$freq, min.freq = 1,
          max.words=20, random.order=FALSE, rot.per=0.15, 
          colors=brewer.pal(8, "Dark2"))

##########################
## Relative frequencies ##
##########################

#Qualitative dataset
freq_quali <- as.data.frame(table(freq_quali$term))
freq_quali$rfreq <- freq_quali$Freq/ nrow(freq_quali)
freq_quali$Var1 <- factor(freq_quali$Var1, 
                          levels = freq_quali$Var1[order(freq_quali$rfreq, decreasing = T)])


#Quantitative dataset
freq_quanti <- as.data.frame(table(freq_quanti$term))
freq_quanti$rfreq <- freq_quanti$Freq/ nrow(freq_quanti)
freq_quanti$Var1 <- factor(freq_quanti$Var1, 
                          levels = freq_quanti$Var1[order(freq_quanti$rfreq, decreasing = T)])


##########################
## Statistical analysis ##
##########################

#Tipo de abordagem, quantitativo
labelquali <- c("Entrevista", "Estudo de caso", "Análise de conteúdo", "Etnografia", "Entrevista em profundidade",
                "Análise documental", "Análise de discurso", "Process tracing", "QCA", "Obs. participante",
                "Grupo focal", "Grounded theory", "História de vida", "CMA", "Retórica")
png("Output/frequali.png")
g1 <- ggplot(freq_quali,
       aes(x=Var1, y=rfreq)) +
  geom_bar(stat = "identity") + 
  coord_flip() + 
  scale_x_discrete(labels= labelquali) +
  xlab("") + ylab("") + ylim(0, 15) +
  theme_bw(base_size = 20)
  print(g1)
dev.off()


labelquanti <- c("Survey", "Regressão", "Experimentos", "Análise de Cluster",
                 "Análise de Texto", "RDD", "Análise de Sobrevivência",
                 "Pareamento", "Experimentos Naturais")
png("Output/frequanti.png")
g2 <- ggplot(freq_quanti,
             aes(x=Var1, y=rfreq)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  scale_x_discrete(labels= labelquanti) +
  xlab("") + ylab("") +
  theme_bw(base_size = 20)
  print(g2)
dev.off()

#Média de abordagem por revista
journal_quali <- total %>% 
  dplyr::group_by(journal) %>% 
  dplyr::summarize (mean = mean.default (total$quali, na.rm = T),
             sd = sd(total$quali, na.rm = TRUE),
             median = median(total$quali, na.rm =  T),
             sum = sum(total$quali, na.rm = T))


journal_quanti <- total %>% 
  dplyr::group_by(journal) %>% 
  dplyr::summarize (mean = mean.default (total$quanti, na.rm = T),
             sd = sd(total$quanti, na.rm = TRUE),
             median = median(total$quanti, na.rm =  T),
             sum = sum(total$quanti, na.rm = T))

journal_transp <- total %>% 
  dplyr::group_by(journal) %>% 
  dplyr::summarize (mean = mean.default (transp, na.rm = T),
             sd = sd(transp, na.rm = TRUE),
             median = median(transp, na.rm =  T),
             sum = sum(transp, na.rm = T))


#média de abordagem por ano
ano_quali <- total %>% 
    dplyr::group_by(year) %>% 
    dplyr::summarise(mean = mean.default (quali, na.rm = T),
               sd = sd(quali, na.rm = TRUE),
               median = median(quali, na.rm =  T),
               sum = sum(quali, na.rm = T))


ano_quanti <- total %>% 
  dplyr::group_by(year) %>% 
  dplyr::summarise(mean = mean.default(quanti, na.rm = T),
            sd = sd(quanti, na.rm = T),
            median = median(quanti, na.rm = T),
            sum = sum(quanti, na.rm = T))


ano_transp <- total %>% 
  dplyr::group_by(year) %>% 
  dplyr::summarize (mean = mean.default (transp, na.rm = T),
             sd = sd(transp, na.rm = TRUE),
             median = median(transp, na.rm =  T),
             sum = sum(transp, na.rm = T))


#Quantidade de utilizadao de abordagem por ano

png("Output/anoquali.png")
g4 <- ggplot(data = ano_quali) + 
  geom_line(mapping = aes(x = year, y = sum)) +
  xlab("") + ylab("") +
  geom_vline(xintercept = 1994, colour = "red") +
  theme_bw(base_size = 20)
print(g4)
dev.off()

png("Output/anoquanti.png")
g5 <- ggplot(data = ano_quanti) + 
  geom_line(mapping = aes(x = year, y = sum)) +
  xlab("") + ylab("") +
  geom_vline(xintercept = 1996, colour = "red") +
  theme_bw(base_size = 20)
print(g5)
dev.off()

ano_transp$year <- as.numeric(ano_transp$year)
png("Output/anotransp.png")
g6 <- ggplot(data = ano_transp) + 
  geom_line(mapping = aes(x = year, y = sum)) +
  xlab("") + ylab("") +
  geom_vline(xintercept = 2013, colour = "red") +
  theme_bw(base_size = 20)
print(g6)
dev.off()


  ### Plotando os gráficos de linha juntos
  png("Output/abordagens_linha.png")
  gj <- ggplot()+
    geom_line(data=ano_quali,aes(y=sum,x= year,linetype="Qualitativa"),size=0.7) +
    geom_line(data=ano_quanti,aes(y=sum,x= year, linetype= "Quantitativa"),size=0.7)+
    geom_line(data=ano_transp,aes(y=sum,x= year,linetype= "Transparência"),size=0.7) +
    scale_linetype_manual(name = "Abordagem",
                          values = c("Qualitativa" = "solid",
                                     "Quantitativa" = "dotted",
                                     "Transparência" = "longdash")) +
    ylab(" ") + xlab(" ")+
    theme_classic(base_size = 14) +
    geom_vline(xintercept = c(1994, 1996, 2013), linetype = "solid", colour = "gray")
  print(gj)
  dev.off()

#Quantidade de abordagem por revista
png("Output/journalquali.png")
g7 <- journal_quali %>%
  mutate(journal = fct_reorder(journal, sum))  %>%
  ggplot( aes(x=journal, y=sum)) +
  geom_bar(stat="identity", alpha=.6, width=.4) +
  coord_flip() +
  xlab("") + ylab("") +
  theme_bw(base_size = 20)
print(g7)
dev.off()

png("Output/journalquanti.png")
g8 <- journal_quanti %>%
  mutate(journal = fct_reorder(journal, sum))  %>%
  ggplot( aes(x=journal, y=sum)) +
  geom_bar(stat="identity", alpha=.6, width=.4) +
  coord_flip() +
  xlab("") + ylab("") +
  theme_bw(base_size = 20)
print(g8)
dev.off()

png("Output/journaltransp.png")
g9 <- journal_transp %>%
  mutate(journal = fct_reorder(journal, journal_transp$sum)) %>% 
  ggplot( aes(x=journal, y=sum)) +
  geom_bar(stat="identity", alpha=.6, width=.4) +
  coord_flip() +
  xlab("") + ylab("") +
  theme_bw(base_size = 20)
print(g9)
dev.off()
